Animation of graphs in R
Using previous age-sex pyramid based on 2021 data, to apply appropriate interactivity and animation methods to design an age-sex pyramid based data visualisation to show the changes of demographic structure of Singapore by age cohort and gender between 2000-2020 at planning area level. The data set used is entitle Singapore Residents by Planning Area / Subzone, Age Group, Sex and Type of Dwelling, June 2000-2010 and Singapore Residents by Planning Area / Subzone, Age Group, Sex and Type of Dwelling, June 2011-2020, from Department of Statistics home page.
packages = c('tidyverse', 'readxl', 'ggthemes')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
pop_data <- read_csv("data/respopagesextod2021.csv")
glimpse (data)
function (..., list = character(), package = NULL, lib.loc = NULL,
verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
To sort the age-sex pyramid using Age Group, we need to classify the AG field as Factor
summary_sex <- pop_data %>%
group_by(AG, Sex) %>%
summarise(Pop = sum(Pop)) %>%
ungroup()
head (summary_sex,5)
# A tibble: 5 x 3
AG Sex Pop
<fct> <chr> <dbl>
1 0_to_4 Females 87730
2 0_to_4 Males 91400
3 5_to_9 Females 97120
4 5_to_9 Males 102390
5 10_to_14 Females 97980
ggplot(summary_sex, aes(x=AG)) +
geom_bar(data=summary_sex[summary_sex$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
geom_bar(data=summary_sex[summary_sex$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
scale_y_continuous(breaks = seq(-160000,160000,40000), labels = function(v) ifelse(abs(v)>=1000,paste0(abs(v)/1000, "K"), abs(v))) +
labs(y="Population", x="Gender") +
ggtitle(" Male Female")
For animation of population across time, data source can be found here at singstat website.
Download 2 sets of data, 1) year 2000 to 2010, and 2) year 2011 to 2020 and combine into 1 set of data.
Check if the header has been copied into the data set by finding out the unique time brackets
year2000 <- read_csv("data/respopagesextod2000to2010.csv")
year2011 <- read_csv("data/respopagesextod2011to2020.csv")
head (year2000,3)
# A tibble: 3 x 7
PA SZ AG Sex TOD Pop Time
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 Ang Mo Kio Cheng San 0_to_4 Males HDB 1- and 2-Room Fla~ 20 2000
2 Ang Mo Kio Cheng San 0_to_4 Males HDB 3-Room Flats 480 2000
3 Ang Mo Kio Cheng San 0_to_4 Males HDB 4-Room Flats 220 2000
head (year2011,3)
# A tibble: 3 x 7
PA SZ AG Sex TOD Pop Time
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 1- a~ 0 2011
2 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 3-Ro~ 10 2011
3 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 4-Ro~ 30 2011
# Since columns are the same, we can combine the 2 files into 1 file for processing
combined <- rbind(year2000,year2011)
unique(combined$Time)
[1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
[14] 2013 2014 2015 2016 2017 2018 2019 2020
# write_csv(combined, "combined.csv")
# in the Time column, there are only numbers, hence the row header was not copied into the data
To sort the age-sex pyramid using Age Group, we need to classify the AG field as Factor
To plot the graph over the different years, we need to call out the Time field as a column (variable)
summary_sex_20 <- combined %>%
group_by(AG, Sex, Time) %>%
summarise(Pop = sum(Pop)) %>%
ungroup()
head (summary_sex_20,5)
# A tibble: 5 x 4
AG Sex Time Pop
<fct> <chr> <dbl> <dbl>
1 0_to_4 Females 2000 108850
2 0_to_4 Females 2001 107510
3 0_to_4 Females 2002 105310
4 0_to_4 Females 2003 101430
5 0_to_4 Females 2004 99290
Using the individual Age-sex pyramid from above (plotted for year 2021), we re-use the code to plot out 20 pyramid graphs, 1 graph for each year.
ggplot(summary_sex_20, aes(x=AG)) +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
scale_y_continuous(breaks = seq(-160000,160000,40000), labels = function(v) ifelse(abs(v)>=1000,paste0(abs(v)/1000, "K"), abs(v))) +
labs(title = "Age-Sex Population Pyramid, Singapore 2021",
caption = 'Data Source: Department of Statistics (June 2021)',
y = "Population", x = "Gender") +
theme_bw() +
theme(legend.position = "none")+
theme(plot.title = element_text(size=16))+
theme(plot.subtitle = element_text(size=12))+
facet_wrap(. ~ `Time`,ncol=4)
It is clear from the 20 graphs displayed, that the difference in population year on year is not clear. To show more clarity, we use the year as base to transition the graph in 1 frame in the next section.
but first, we enhance the graph by
adding title caption theme find out the maximum and minimum values of the population to set the chart axis to ensure all the values will be captured properly.
We call out the package required which is ggaminate.
packages = c('gganimate')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
SG20 <- ggplot(summary_sex_20, aes(x=AG,colour=Sex,fill=Sex)) +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Males",], aes(y=Pop*-1), stat="identity") +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Females",], aes(y=Pop), stat="identity") +
geom_hline(yintercept=0, colour="white", lwd=1) +
coord_flip() +
scale_y_continuous(limits = c(-170000, 170000), n.breaks = 10, labels = function(v) ifelse(abs(v)>= 1000,paste0(abs(v)/1000, "K"), abs(v))) +
labs(title = "Singapore Age-Sex Population Pyramid for 20 years",
subtitle = 'Year: "{round(frame_time, 0)}"',
caption = 'Data Source: Department of Statistics (June 2000 to June 2020)',
y = 'Male and Female Population',
x = 'Age Group') +
theme_bw () +
theme(legend.text = element_text(size=12))+
theme(plot.title = element_text(size=16))+
theme(plot.subtitle = element_text(size=10))
SG20
SG20 +
transition_time(Time) +
ease_aes('linear')
Interactive plots help to us compare the same point across 2 graphs
Loading packages for interactive plots
packages = c('tidyverse', 'readxl', 'ggthemes', 'ggiraph', 'plotly',
'gganimate', 'patchwork', 'DT', 'gifski', 'gapminder')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
interactive_data <- combined %>%
spread (Sex, sum(Pop)) %>%
mutate(Total = Females + Males) %>%
group_by(Time, AG, PA) %>%
filter(PA == c("Ang Mo Kio", "Marine Parade")) %>%
summarise(Female = sum(Females),
Male =sum(Males),
Total = sum(Total))
interactive_data$AG <- factor(interactive_data$AG, levels = unique(interactive_data$AG))
interactive_data <- interactive_data %>%
mutate(Female_prop = Female / Total*100) %>%
mutate(Male_prop = Male / Total*100)
head (interactive_data,5)
# A tibble: 5 x 8
# Groups: Time, AG [3]
Time AG PA Female Male Total Female_prop Male_prop
<dbl> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2000 0_to_4 Ang Mo Kio 3020 3230 6250 48.3 51.7
2 2000 0_to_4 Marine Para~ 280 280 560 50 50
3 2000 5_to_9 Ang Mo Kio 4010 4030 8040 49.9 50.1
4 2000 5_to_9 Marine Para~ 320 270 590 54.2 45.8
5 2000 10_to_14 Ang Mo Kio 3780 3830 7610 49.7 50.3
d <- highlight_key(interactive_data)
p1 <- ggplot (data = d,
aes(x = Time,
y = Total)) +
geom_col() +
labs(title = 'Population across time in 2 Planning areas') +
facet_wrap(. ~ `PA`)
p1
p2 <- ggplot (data = d,
aes(x =AG,
y = Total)) +
geom_point() +
labs(title = 'Total population across Age group in the Planning areas for 20 years')+
facet_wrap(. ~ `PA`) +
coord_flip()
p2
Try clicking on 1 graph to see where the point is on the next graph
subplot (ggplotly (p1),
ggplotly (p2))
Try clicking on the table to see where is the point in the graph
Agegroup_PA <- combined %>%
group_by(AG, PA) %>%
summarise (Pop = sum (Pop))
d <- highlight_key(Agegroup_PA)
p3 <- ggplot (data = d, aes(x = AG, y = Pop))+
geom_col () +
ggtitle ("Age groups in Singapore's Planning Area",
subtitle = 'Planning Area: {PA}') +
coord_flip()
gg <- highlight(ggplotly(p3),
"plotly_selected")
crosstalk::bscols(gg,
DT::datatable(d),
widths = 20)
Plotting the graph to show the number of population across the age group by Planning area for 20 years (2000 - 2020).
animate1 <- ggplot (Agegroup_PA, aes(x = AG, y = Pop/1000))+
geom_col () +
coord_flip() +
ggtitle('Planning area: {closest_state}') +
labs (x = 'AG',
y = 'Population (thousand)') +
transition_states (PA) +
ease_aes('linear') +
enter_fade() +
exit_fade()
animate(animate1,fps=2)